{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load modules\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load example datasets\n",
    "drinks = pd.read_csv('./drinksbycountry.csv')\n",
    "movies = pd.read_csv('./imdbratings.csv')\n",
    "orders = pd.read_csv('./chiporders.csv', sep='\\t')\n",
    "orders['item_price'] = orders.item_price.str.replace('$', '').astype('float')\n",
    "stocks = pd.read_csv('./smallstocks.csv', parse_dates=['Date'])\n",
    "titanic = pd.read_csv('./kaggletrain.csv')\n",
    "ufo = pd.read_csv('./uforeports.csv', parse_dates=['Time'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0.23.4'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "INSTALLED VERSIONS\n",
      "------------------\n",
      "commit: None\n",
      "python: 3.6.3.final.0\n",
      "python-bits: 64\n",
      "OS: Darwin\n",
      "OS-release: 18.7.0\n",
      "machine: x86_64\n",
      "processor: i386\n",
      "byteorder: little\n",
      "LC_ALL: None\n",
      "LANG: None\n",
      "LOCALE: zh_CN.UTF-8\n",
      "\n",
      "pandas: 0.23.4\n",
      "pytest: 5.1.3\n",
      "pip: 10.0.1\n",
      "setuptools: 39.1.0\n",
      "Cython: 0.29.13\n",
      "numpy: 1.16.2\n",
      "scipy: 1.2.0\n",
      "pyarrow: None\n",
      "xarray: None\n",
      "IPython: 7.12.0\n",
      "sphinx: 2.2.0\n",
      "patsy: 0.5.1\n",
      "dateutil: 2.7.5\n",
      "pytz: 2018.9\n",
      "blosc: None\n",
      "bottleneck: None\n",
      "tables: None\n",
      "numexpr: None\n",
      "feather: None\n",
      "matplotlib: 2.2.4\n",
      "openpyxl: 2.6.2\n",
      "xlrd: 1.2.0\n",
      "xlwt: 1.3.0\n",
      "xlsxwriter: None\n",
      "lxml: 4.3.3\n",
      "bs4: 4.7.1\n",
      "html5lib: None\n",
      "sqlalchemy: 1.3.4\n",
      "pymysql: 0.9.3\n",
      "psycopg2: None\n",
      "jinja2: 2.10.3\n",
      "s3fs: None\n",
      "fastparquet: None\n",
      "pandas_gbq: None\n",
      "pandas_datareader: None\n"
     ]
    }
   ],
   "source": [
    "pd.show_versions()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col one</th>\n",
       "      <th>col two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>200</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col one  col two\n",
       "0      100      300\n",
       "1      200      400"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({'col one':[100, 200], 'col two':[300, 400]})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.145522</td>\n",
       "      <td>0.335658</td>\n",
       "      <td>0.535959</td>\n",
       "      <td>0.779452</td>\n",
       "      <td>0.084929</td>\n",
       "      <td>0.120408</td>\n",
       "      <td>0.161779</td>\n",
       "      <td>0.528535</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.243576</td>\n",
       "      <td>0.530217</td>\n",
       "      <td>0.666263</td>\n",
       "      <td>0.910802</td>\n",
       "      <td>0.118165</td>\n",
       "      <td>0.359488</td>\n",
       "      <td>0.048745</td>\n",
       "      <td>0.096896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.022355</td>\n",
       "      <td>0.242366</td>\n",
       "      <td>0.992369</td>\n",
       "      <td>0.501184</td>\n",
       "      <td>0.073475</td>\n",
       "      <td>0.070612</td>\n",
       "      <td>0.720375</td>\n",
       "      <td>0.011724</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.112318</td>\n",
       "      <td>0.693710</td>\n",
       "      <td>0.405520</td>\n",
       "      <td>0.403132</td>\n",
       "      <td>0.388442</td>\n",
       "      <td>0.586789</td>\n",
       "      <td>0.312562</td>\n",
       "      <td>0.676533</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4         5         6  \\\n",
       "0  0.145522  0.335658  0.535959  0.779452  0.084929  0.120408  0.161779   \n",
       "1  0.243576  0.530217  0.666263  0.910802  0.118165  0.359488  0.048745   \n",
       "2  0.022355  0.242366  0.992369  0.501184  0.073475  0.070612  0.720375   \n",
       "3  0.112318  0.693710  0.405520  0.403132  0.388442  0.586789  0.312562   \n",
       "\n",
       "          7  \n",
       "0  0.528535  \n",
       "1  0.096896  \n",
       "2  0.011724  \n",
       "3  0.676533  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(np.random.rand(4, 8))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "      <th>f</th>\n",
       "      <th>g</th>\n",
       "      <th>h</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.300741</td>\n",
       "      <td>0.714298</td>\n",
       "      <td>0.523294</td>\n",
       "      <td>0.121821</td>\n",
       "      <td>0.483656</td>\n",
       "      <td>0.497611</td>\n",
       "      <td>0.736720</td>\n",
       "      <td>0.421708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.304452</td>\n",
       "      <td>0.566237</td>\n",
       "      <td>0.863731</td>\n",
       "      <td>0.846776</td>\n",
       "      <td>0.620437</td>\n",
       "      <td>0.582564</td>\n",
       "      <td>0.535506</td>\n",
       "      <td>0.833922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.382465</td>\n",
       "      <td>0.408305</td>\n",
       "      <td>0.361108</td>\n",
       "      <td>0.355661</td>\n",
       "      <td>0.862713</td>\n",
       "      <td>0.277821</td>\n",
       "      <td>0.293187</td>\n",
       "      <td>0.181719</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.362419</td>\n",
       "      <td>0.245652</td>\n",
       "      <td>0.465784</td>\n",
       "      <td>0.455508</td>\n",
       "      <td>0.546289</td>\n",
       "      <td>0.178675</td>\n",
       "      <td>0.566118</td>\n",
       "      <td>0.047852</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          a         b         c         d         e         f         g  \\\n",
       "0  0.300741  0.714298  0.523294  0.121821  0.483656  0.497611  0.736720   \n",
       "1  0.304452  0.566237  0.863731  0.846776  0.620437  0.582564  0.535506   \n",
       "2  0.382465  0.408305  0.361108  0.355661  0.862713  0.277821  0.293187   \n",
       "3  0.362419  0.245652  0.465784  0.455508  0.546289  0.178675  0.566118   \n",
       "\n",
       "          h  \n",
       "0  0.421708  \n",
       "1  0.833922  \n",
       "2  0.181719  \n",
       "3  0.047852  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(np.random.rand(4, 8), columns=list('abcdefgh'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col one</th>\n",
       "      <th>col two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>200</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col one  col two\n",
       "0      100      300\n",
       "1      200      400"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.rename({'col one':'col_one', 'col two':'col_two'}, axis='columns')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns = ['col_one', 'col_two']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns = df.columns.str.replace(' ', '_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col_one</th>\n",
       "      <th>col_two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>200</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col_one  col_two\n",
       "0      100      300\n",
       "1      200      400"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>X_col_one</th>\n",
       "      <th>X_col_two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>200</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   X_col_one  X_col_two\n",
       "0        100        300\n",
       "1        200        400"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.add_prefix('X_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col_one_Y</th>\n",
       "      <th>col_two_Y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>200</td>\n",
       "      <td>400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col_one_Y  col_two_Y\n",
       "0        100        300\n",
       "1        200        400"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.add_suffix('_Y')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>0.7</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country  beer_servings  spirit_servings  wine_servings  \\\n",
       "0  Afghanistan              0                0              0   \n",
       "1      Albania             89              132             54   \n",
       "2      Algeria             25                0             14   \n",
       "3      Andorra            245              138            312   \n",
       "4       Angola            217               57             45   \n",
       "\n",
       "   total_litres_of_pure_alcohol continent  \n",
       "0                           0.0      Asia  \n",
       "1                           4.9    Europe  \n",
       "2                           0.7    Africa  \n",
       "3                          12.4    Europe  \n",
       "4                           5.9    Africa  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>64</td>\n",
       "      <td>18</td>\n",
       "      <td>4</td>\n",
       "      <td>4.7</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191</th>\n",
       "      <td>Zambia</td>\n",
       "      <td>32</td>\n",
       "      <td>19</td>\n",
       "      <td>4</td>\n",
       "      <td>2.5</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190</th>\n",
       "      <td>Yemen</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.1</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189</th>\n",
       "      <td>Vietnam</td>\n",
       "      <td>111</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>188</th>\n",
       "      <td>Venezuela</td>\n",
       "      <td>333</td>\n",
       "      <td>100</td>\n",
       "      <td>3</td>\n",
       "      <td>7.7</td>\n",
       "      <td>South America</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country  beer_servings  spirit_servings  wine_servings  \\\n",
       "192   Zimbabwe             64               18              4   \n",
       "191     Zambia             32               19              4   \n",
       "190      Yemen              6                0              0   \n",
       "189    Vietnam            111                2              1   \n",
       "188  Venezuela            333              100              3   \n",
       "\n",
       "     total_litres_of_pure_alcohol      continent  \n",
       "192                           4.7         Africa  \n",
       "191                           2.5         Africa  \n",
       "190                           0.1           Asia  \n",
       "189                           2.0           Asia  \n",
       "188                           7.7  South America  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.loc[::-1].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>64</td>\n",
       "      <td>18</td>\n",
       "      <td>4</td>\n",
       "      <td>4.7</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Zambia</td>\n",
       "      <td>32</td>\n",
       "      <td>19</td>\n",
       "      <td>4</td>\n",
       "      <td>2.5</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Yemen</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.1</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Vietnam</td>\n",
       "      <td>111</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Venezuela</td>\n",
       "      <td>333</td>\n",
       "      <td>100</td>\n",
       "      <td>3</td>\n",
       "      <td>7.7</td>\n",
       "      <td>South America</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     country  beer_servings  spirit_servings  wine_servings  \\\n",
       "0   Zimbabwe             64               18              4   \n",
       "1     Zambia             32               19              4   \n",
       "2      Yemen              6                0              0   \n",
       "3    Vietnam            111                2              1   \n",
       "4  Venezuela            333              100              3   \n",
       "\n",
       "   total_litres_of_pure_alcohol      continent  \n",
       "0                           4.7         Africa  \n",
       "1                           2.5         Africa  \n",
       "2                           0.1           Asia  \n",
       "3                           2.0           Asia  \n",
       "4                           7.7  South America  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.loc[::-1].reset_index(drop=True).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>continent</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Asia</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Afghanistan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Europe</td>\n",
       "      <td>4.9</td>\n",
       "      <td>54</td>\n",
       "      <td>132</td>\n",
       "      <td>89</td>\n",
       "      <td>Albania</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Africa</td>\n",
       "      <td>0.7</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>Algeria</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Europe</td>\n",
       "      <td>12.4</td>\n",
       "      <td>312</td>\n",
       "      <td>138</td>\n",
       "      <td>245</td>\n",
       "      <td>Andorra</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Africa</td>\n",
       "      <td>5.9</td>\n",
       "      <td>45</td>\n",
       "      <td>57</td>\n",
       "      <td>217</td>\n",
       "      <td>Angola</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  continent  total_litres_of_pure_alcohol  wine_servings  spirit_servings  \\\n",
       "0      Asia                           0.0              0                0   \n",
       "1    Europe                           4.9             54              132   \n",
       "2    Africa                           0.7             14                0   \n",
       "3    Europe                          12.4            312              138   \n",
       "4    Africa                           5.9             45               57   \n",
       "\n",
       "   beer_servings      country  \n",
       "0              0  Afghanistan  \n",
       "1             89      Albania  \n",
       "2             25      Algeria  \n",
       "3            245      Andorra  \n",
       "4            217       Angola  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.loc[:, ::-1].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "country                          object\n",
       "beer_servings                     int64\n",
       "spirit_servings                   int64\n",
       "wine_servings                     int64\n",
       "total_litres_of_pure_alcohol    float64\n",
       "continent                        object\n",
       "dtype: object"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   beer_servings  spirit_servings  wine_servings  total_litres_of_pure_alcohol\n",
       "0              0                0              0                           0.0\n",
       "1             89              132             54                           4.9\n",
       "2             25                0             14                           0.7\n",
       "3            245              138            312                          12.4\n",
       "4            217               57             45                           5.9"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.select_dtypes(include='number').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country continent\n",
       "0  Afghanistan      Asia\n",
       "1      Albania    Europe\n",
       "2      Algeria    Africa\n",
       "3      Andorra    Europe\n",
       "4       Angola    Africa"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.select_dtypes(include='object').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>0.7</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country  beer_servings  spirit_servings  wine_servings  \\\n",
       "0  Afghanistan              0                0              0   \n",
       "1      Albania             89              132             54   \n",
       "2      Algeria             25                0             14   \n",
       "3      Andorra            245              138            312   \n",
       "4       Angola            217               57             45   \n",
       "\n",
       "   total_litres_of_pure_alcohol continent  \n",
       "0                           0.0      Asia  \n",
       "1                           4.9    Europe  \n",
       "2                           0.7    Africa  \n",
       "3                          12.4    Europe  \n",
       "4                           5.9    Africa  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.select_dtypes(include=['number', 'object', 'category', 'datetime']).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country continent\n",
       "0  Afghanistan      Asia\n",
       "1      Albania    Europe\n",
       "2      Algeria    Africa\n",
       "3      Andorra    Europe\n",
       "4       Angola    Africa"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks.select_dtypes(exclude='number').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col_one</th>\n",
       "      <th>col_two</th>\n",
       "      <th>col_three</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.1</td>\n",
       "      <td>4.4</td>\n",
       "      <td>7.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.2</td>\n",
       "      <td>5.5</td>\n",
       "      <td>8.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.3</td>\n",
       "      <td>6.6</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  col_one col_two col_three\n",
       "0     1.1     4.4       7.7\n",
       "1     2.2     5.5       8.8\n",
       "2     3.3     6.6         -"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({'col_one':['1.1', '2.2', '3.3'],\n",
    "                   'col_two':['4.4', '5.5', '6.6'],\n",
    "                   'col_three':['7.7', '8.8', '-']})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "col_one      object\n",
       "col_two      object\n",
       "col_three    object\n",
       "dtype: object"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "col_one      float64\n",
       "col_two      float64\n",
       "col_three     object\n",
       "dtype: object"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.astype({'col_one':'float', 'col_two':'float'}).dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    7.7\n",
       "1    8.8\n",
       "2    NaN\n",
       "Name: col_three, dtype: float64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.to_numeric(df.col_three, errors='coerce')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    7.7\n",
       "1    8.8\n",
       "2    0.0\n",
       "Name: col_three, dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.to_numeric(df.col_three, errors='coerce').fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col_one</th>\n",
       "      <th>col_two</th>\n",
       "      <th>col_three</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.1</td>\n",
       "      <td>4.4</td>\n",
       "      <td>7.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2.2</td>\n",
       "      <td>5.5</td>\n",
       "      <td>8.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3.3</td>\n",
       "      <td>6.6</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   col_one  col_two  col_three\n",
       "0      1.1      4.4        7.7\n",
       "1      2.2      5.5        8.8\n",
       "2      3.3      6.6        0.0"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.apply(pd.to_numeric, errors='coerce').fillna(0)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "col_one      float64\n",
       "col_two      float64\n",
       "col_three    float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 193 entries, 0 to 192\n",
      "Data columns (total 6 columns):\n",
      "country                         193 non-null object\n",
      "beer_servings                   193 non-null int64\n",
      "spirit_servings                 193 non-null int64\n",
      "wine_servings                   193 non-null int64\n",
      "total_litres_of_pure_alcohol    193 non-null float64\n",
      "continent                       193 non-null object\n",
      "dtypes: float64(1), int64(3), object(2)\n",
      "memory usage: 30.4 KB\n"
     ]
    }
   ],
   "source": [
    "drinks.info(memory_usage='deep')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 193 entries, 0 to 192\n",
      "Data columns (total 2 columns):\n",
      "beer_servings    193 non-null int64\n",
      "continent        193 non-null object\n",
      "dtypes: int64(1), object(1)\n",
      "memory usage: 13.6 KB\n"
     ]
    }
   ],
   "source": [
    "cols = ['beer_servings', 'continent']\n",
    "small_drinks = pd.read_csv('./drinksbycountry.csv', usecols=cols)\n",
    "small_drinks.info(memory_usage='deep')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 193 entries, 0 to 192\n",
      "Data columns (total 2 columns):\n",
      "beer_servings    193 non-null int64\n",
      "continent        193 non-null category\n",
      "dtypes: category(1), int64(1)\n",
      "memory usage: 2.3 KB\n"
     ]
    }
   ],
   "source": [
    "dtypes = {'continent':'category'}\n",
    "smaller_drinks = pd.read_csv('./drinksbycountry.csv', usecols=cols, dtype=dtypes)\n",
    "smaller_drinks.info(memory_usage='deep')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>89</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>25</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>245</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>217</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   beer_servings continent\n",
       "0              0      Asia\n",
       "1             89    Europe\n",
       "2             25    Africa\n",
       "3            245    Europe\n",
       "4            217    Africa"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "smaller_drinks.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "beer_servings       int64\n",
       "continent        category\n",
       "dtype: object"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "smaller_drinks.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Symbol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>31.50</td>\n",
       "      <td>14070500</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>112.52</td>\n",
       "      <td>21701800</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>57.42</td>\n",
       "      <td>19189500</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date   Close    Volume Symbol\n",
       "0  2016-10-03   31.50  14070500   CSCO\n",
       "1  2016-10-03  112.52  21701800   AAPL\n",
       "2  2016-10-03   57.42  19189500   MSFT"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('./stocks1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Symbol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>113.00</td>\n",
       "      <td>29736800</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>57.24</td>\n",
       "      <td>20085900</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>31.35</td>\n",
       "      <td>18460400</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date   Close    Volume Symbol\n",
       "0  2016-10-04  113.00  29736800   AAPL\n",
       "1  2016-10-04   57.24  20085900   MSFT\n",
       "2  2016-10-04   31.35  18460400   CSCO"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('./stocks2.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Symbol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>57.64</td>\n",
       "      <td>16726400</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>31.59</td>\n",
       "      <td>11808600</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>113.05</td>\n",
       "      <td>21453100</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date   Close    Volume Symbol\n",
       "0  2016-10-05   57.64  16726400   MSFT\n",
       "1  2016-10-05   31.59  11808600   CSCO\n",
       "2  2016-10-05  113.05  21453100   AAPL"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('./stocks3.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['stocks1.csv', 'stocks2.csv', 'stocks3.csv']"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from glob import glob\n",
    "stock_files = sorted(glob('stocks*.csv'))\n",
    "stock_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Symbol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>31.50</td>\n",
       "      <td>14070500</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>112.52</td>\n",
       "      <td>21701800</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>57.42</td>\n",
       "      <td>19189500</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>113.00</td>\n",
       "      <td>29736800</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>57.24</td>\n",
       "      <td>20085900</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>31.35</td>\n",
       "      <td>18460400</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>57.64</td>\n",
       "      <td>16726400</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>31.59</td>\n",
       "      <td>11808600</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>113.05</td>\n",
       "      <td>21453100</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date   Close    Volume Symbol\n",
       "0  2016-10-03   31.50  14070500   CSCO\n",
       "1  2016-10-03  112.52  21701800   AAPL\n",
       "2  2016-10-03   57.42  19189500   MSFT\n",
       "0  2016-10-04  113.00  29736800   AAPL\n",
       "1  2016-10-04   57.24  20085900   MSFT\n",
       "2  2016-10-04   31.35  18460400   CSCO\n",
       "0  2016-10-05   57.64  16726400   MSFT\n",
       "1  2016-10-05   31.59  11808600   CSCO\n",
       "2  2016-10-05  113.05  21453100   AAPL"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat((pd.read_csv(file) for file in stock_files))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Symbol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>31.50</td>\n",
       "      <td>14070500</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>112.52</td>\n",
       "      <td>21701800</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>57.42</td>\n",
       "      <td>19189500</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>113.00</td>\n",
       "      <td>29736800</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>57.24</td>\n",
       "      <td>20085900</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>31.35</td>\n",
       "      <td>18460400</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>57.64</td>\n",
       "      <td>16726400</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>31.59</td>\n",
       "      <td>11808600</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>113.05</td>\n",
       "      <td>21453100</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Date   Close    Volume Symbol\n",
       "0  2016-10-03   31.50  14070500   CSCO\n",
       "1  2016-10-03  112.52  21701800   AAPL\n",
       "2  2016-10-03   57.42  19189500   MSFT\n",
       "3  2016-10-04  113.00  29736800   AAPL\n",
       "4  2016-10-04   57.24  20085900   MSFT\n",
       "5  2016-10-04   31.35  18460400   CSCO\n",
       "6  2016-10-05   57.64  16726400   MSFT\n",
       "7  2016-10-05   31.59  11808600   CSCO\n",
       "8  2016-10-05  113.05  21453100   AAPL"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat((pd.read_csv(file) for file in stock_files), ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country  beer_servings  spirit_servings\n",
       "0  Afghanistan              0                0\n",
       "1      Albania             89              132\n",
       "2      Algeria             25                0\n",
       "3      Andorra            245              138\n",
       "4       Angola            217               57"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('./drinks1.csv').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>14</td>\n",
       "      <td>0.7</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   wine_servings  total_litres_of_pure_alcohol continent\n",
       "0              0                           0.0      Asia\n",
       "1             54                           4.9    Europe\n",
       "2             14                           0.7    Africa\n",
       "3            312                          12.4    Europe\n",
       "4             45                           5.9    Africa"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv('./drinks2.csv').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Asia</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "      <td>Europe</td>\n",
       "      <td>Albania</td>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>0.7</td>\n",
       "      <td>Africa</td>\n",
       "      <td>Algeria</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>0.7</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "      <td>Europe</td>\n",
       "      <td>Andorra</td>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Africa</td>\n",
       "      <td>Angola</td>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country  beer_servings  spirit_servings  wine_servings  \\\n",
       "0  Afghanistan              0                0              0   \n",
       "1      Albania             89              132             54   \n",
       "2      Algeria             25                0             14   \n",
       "3      Andorra            245              138            312   \n",
       "4       Angola            217               57             45   \n",
       "\n",
       "   total_litres_of_pure_alcohol continent      country  beer_servings  \\\n",
       "0                           0.0      Asia  Afghanistan              0   \n",
       "1                           4.9    Europe      Albania             89   \n",
       "2                           0.7    Africa      Algeria             25   \n",
       "3                          12.4    Europe      Andorra            245   \n",
       "4                           5.9    Africa       Angola            217   \n",
       "\n",
       "   spirit_servings  wine_servings  total_litres_of_pure_alcohol continent  \n",
       "0                0              0                           0.0      Asia  \n",
       "1              132             54                           4.9    Europe  \n",
       "2                0             14                           0.7    Africa  \n",
       "3              138            312                          12.4    Europe  \n",
       "4               57             45                           5.9    Africa  "
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drink_files = sorted(glob('./drinks*.csv'))\n",
    "pd.concat((pd.read_csv(file) for file in drink_files), axis='columns').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Column A</th>\n",
       "      <th>Column B</th>\n",
       "      <th>Column C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>4.4</td>\n",
       "      <td>seven</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>5.5</td>\n",
       "      <td>eight</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>6.6</td>\n",
       "      <td>nine</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Column A  Column B Column C\n",
       "0         1       4.4    seven\n",
       "1         2       5.5    eight\n",
       "2         3       6.6     nine"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_clipboard()\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Column A      int64\n",
       "Column B    float64\n",
       "Column C     object\n",
       "dtype: object"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Left</th>\n",
       "      <th>Right</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Alice</th>\n",
       "      <td>10</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bob</th>\n",
       "      <td>20</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Charlie</th>\n",
       "      <td>30</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Left  Right\n",
       "Alice      10     40\n",
       "Bob        20     50\n",
       "Charlie    30     60"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_clipboard()\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Alice', 'Bob', 'Charlie'], dtype='object')"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "979"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(movies)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "movies_1 = movies.sample(frac=0.75, random_state=1234)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "movies_2 = movies.drop(movies_1.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "979"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(movies_1) + len(movies_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([  0,   2,   5,   6,   7,   8,   9,  11,  13,  16,\n",
       "            ...\n",
       "            966, 967, 969, 971, 972, 974, 975, 976, 977, 978],\n",
       "           dtype='int64', length=734)"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies_1.index.sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([  1,   3,   4,  10,  12,  14,  15,  18,  26,  30,\n",
       "            ...\n",
       "            931, 934, 937, 941, 950, 954, 960, 968, 970, 973],\n",
       "           dtype='int64', length=245)"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies_2.index.sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9.3</td>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>142</td>\n",
       "      <td>[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.2</td>\n",
       "      <td>The Godfather</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>175</td>\n",
       "      <td>[u'Marlon Brando', u'Al Pacino', u'James Caan']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9.1</td>\n",
       "      <td>The Godfather: Part II</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>200</td>\n",
       "      <td>[u'Al Pacino', u'Robert De Niro', u'Robert Duv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Pulp Fiction</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>154</td>\n",
       "      <td>[u'John Travolta', u'Uma Thurman', u'Samuel L....</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   star_rating                     title content_rating   genre  duration  \\\n",
       "0          9.3  The Shawshank Redemption              R   Crime       142   \n",
       "1          9.2             The Godfather              R   Crime       175   \n",
       "2          9.1    The Godfather: Part II              R   Crime       200   \n",
       "3          9.0           The Dark Knight          PG-13  Action       152   \n",
       "4          8.9              Pulp Fiction              R   Crime       154   \n",
       "\n",
       "                                         actors_list  \n",
       "0  [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...  \n",
       "1    [u'Marlon Brando', u'Al Pacino', u'James Caan']  \n",
       "2  [u'Al Pacino', u'Robert De Niro', u'Robert Duv...  \n",
       "3  [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "4  [u'John Travolta', u'Uma Thurman', u'Samuel L....  "
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Crime', 'Action', 'Drama', 'Western', 'Adventure', 'Biography',\n",
       "       'Comedy', 'Animation', 'Mystery', 'Horror', 'Film-Noir', 'Sci-Fi',\n",
       "       'History', 'Thriller', 'Family', 'Fantasy'], dtype=object)"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies.genre.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8.9</td>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>NOT RATED</td>\n",
       "      <td>Drama</td>\n",
       "      <td>96</td>\n",
       "      <td>[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>8.9</td>\n",
       "      <td>The Good, the Bad and the Ugly</td>\n",
       "      <td>NOT RATED</td>\n",
       "      <td>Western</td>\n",
       "      <td>161</td>\n",
       "      <td>[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Fight Club</td>\n",
       "      <td>R</td>\n",
       "      <td>Drama</td>\n",
       "      <td>139</td>\n",
       "      <td>[u'Brad Pitt', u'Edward Norton', u'Helena Bonh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>8.8</td>\n",
       "      <td>Inception</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>148</td>\n",
       "      <td>[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                           title content_rating    genre  \\\n",
       "3           9.0                 The Dark Knight          PG-13   Action   \n",
       "5           8.9                    12 Angry Men      NOT RATED    Drama   \n",
       "6           8.9  The Good, the Bad and the Ugly      NOT RATED  Western   \n",
       "9           8.9                      Fight Club              R    Drama   \n",
       "11          8.8                       Inception          PG-13   Action   \n",
       "\n",
       "    duration                                        actors_list  \n",
       "3        152  [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "5         96  [u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...  \n",
       "6        161  [u'Clint Eastwood', u'Eli Wallach', u'Lee Van ...  \n",
       "9        139  [u'Brad Pitt', u'Edward Norton', u'Helena Bonh...  \n",
       "11       148  [u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...  "
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies[(movies.genre == 'Action') |\n",
    "       (movies.genre == 'Drama') |\n",
    "       (movies.genre == 'Western')].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8.9</td>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>NOT RATED</td>\n",
       "      <td>Drama</td>\n",
       "      <td>96</td>\n",
       "      <td>[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>8.9</td>\n",
       "      <td>The Good, the Bad and the Ugly</td>\n",
       "      <td>NOT RATED</td>\n",
       "      <td>Western</td>\n",
       "      <td>161</td>\n",
       "      <td>[u'Clint Eastwood', u'Eli Wallach', u'Lee Van ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Fight Club</td>\n",
       "      <td>R</td>\n",
       "      <td>Drama</td>\n",
       "      <td>139</td>\n",
       "      <td>[u'Brad Pitt', u'Edward Norton', u'Helena Bonh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>8.8</td>\n",
       "      <td>Inception</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>148</td>\n",
       "      <td>[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                           title content_rating    genre  \\\n",
       "3           9.0                 The Dark Knight          PG-13   Action   \n",
       "5           8.9                    12 Angry Men      NOT RATED    Drama   \n",
       "6           8.9  The Good, the Bad and the Ugly      NOT RATED  Western   \n",
       "9           8.9                      Fight Club              R    Drama   \n",
       "11          8.8                       Inception          PG-13   Action   \n",
       "\n",
       "    duration                                        actors_list  \n",
       "3        152  [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "5         96  [u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...  \n",
       "6        161  [u'Clint Eastwood', u'Eli Wallach', u'Lee Van ...  \n",
       "9        139  [u'Brad Pitt', u'Edward Norton', u'Helena Bonh...  \n",
       "11       148  [u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...  "
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies[movies.genre.isin(['Action', 'Drama', 'Western'])].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9.3</td>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>142</td>\n",
       "      <td>[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.2</td>\n",
       "      <td>The Godfather</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>175</td>\n",
       "      <td>[u'Marlon Brando', u'Al Pacino', u'James Caan']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9.1</td>\n",
       "      <td>The Godfather: Part II</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>200</td>\n",
       "      <td>[u'Al Pacino', u'Robert De Niro', u'Robert Duv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Pulp Fiction</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>154</td>\n",
       "      <td>[u'John Travolta', u'Uma Thurman', u'Samuel L....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8.9</td>\n",
       "      <td>The Lord of the Rings: The Return of the King</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Adventure</td>\n",
       "      <td>201</td>\n",
       "      <td>[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   star_rating                                          title content_rating  \\\n",
       "0          9.3                       The Shawshank Redemption              R   \n",
       "1          9.2                                  The Godfather              R   \n",
       "2          9.1                         The Godfather: Part II              R   \n",
       "4          8.9                                   Pulp Fiction              R   \n",
       "7          8.9  The Lord of the Rings: The Return of the King          PG-13   \n",
       "\n",
       "       genre  duration                                        actors_list  \n",
       "0      Crime       142  [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...  \n",
       "1      Crime       175    [u'Marlon Brando', u'Al Pacino', u'James Caan']  \n",
       "2      Crime       200  [u'Al Pacino', u'Robert De Niro', u'Robert Duv...  \n",
       "4      Crime       154  [u'John Travolta', u'Uma Thurman', u'Samuel L....  \n",
       "7  Adventure       201  [u'Elijah Wood', u'Viggo Mortensen', u'Ian McK...  "
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies[~movies.genre.isin(['Action', 'Drama', 'Western'])].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Drama        278\n",
       "Comedy       156\n",
       "Action       136\n",
       "Crime        124\n",
       "Biography     77\n",
       "Adventure     75\n",
       "Animation     62\n",
       "Horror        29\n",
       "Mystery       16\n",
       "Western        9\n",
       "Sci-Fi         5\n",
       "Thriller       5\n",
       "Film-Noir      3\n",
       "Family         2\n",
       "History        1\n",
       "Fantasy        1\n",
       "Name: genre, dtype: int64"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts = movies.genre.value_counts()\n",
    "counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Drama     278\n",
       "Comedy    156\n",
       "Action    136\n",
       "Name: genre, dtype: int64"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts.nlargest(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Drama', 'Comedy', 'Action'], dtype='object')"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "counts.nlargest(3).index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8.9</td>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>NOT RATED</td>\n",
       "      <td>Drama</td>\n",
       "      <td>96</td>\n",
       "      <td>[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Fight Club</td>\n",
       "      <td>R</td>\n",
       "      <td>Drama</td>\n",
       "      <td>139</td>\n",
       "      <td>[u'Brad Pitt', u'Edward Norton', u'Helena Bonh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>8.8</td>\n",
       "      <td>Inception</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>148</td>\n",
       "      <td>[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>8.8</td>\n",
       "      <td>Star Wars: Episode V - The Empire Strikes Back</td>\n",
       "      <td>PG</td>\n",
       "      <td>Action</td>\n",
       "      <td>124</td>\n",
       "      <td>[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                                           title  \\\n",
       "3           9.0                                 The Dark Knight   \n",
       "5           8.9                                    12 Angry Men   \n",
       "9           8.9                                      Fight Club   \n",
       "11          8.8                                       Inception   \n",
       "12          8.8  Star Wars: Episode V - The Empire Strikes Back   \n",
       "\n",
       "   content_rating   genre  duration  \\\n",
       "3           PG-13  Action       152   \n",
       "5       NOT RATED   Drama        96   \n",
       "9               R   Drama       139   \n",
       "11          PG-13  Action       148   \n",
       "12             PG  Action       124   \n",
       "\n",
       "                                          actors_list  \n",
       "3   [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "5   [u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...  \n",
       "9   [u'Brad Pitt', u'Edward Norton', u'Helena Bonh...  \n",
       "11  [u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...  \n",
       "12  [u'Mark Hamill', u'Harrison Ford', u'Carrie Fi...  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies[movies.genre.isin(counts.nlargest(3).index)].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Colors Reported</th>\n",
       "      <th>Shape Reported</th>\n",
       "      <th>State</th>\n",
       "      <th>Time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ithaca</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TRIANGLE</td>\n",
       "      <td>NY</td>\n",
       "      <td>1930-06-01 22:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Willingboro</td>\n",
       "      <td>NaN</td>\n",
       "      <td>OTHER</td>\n",
       "      <td>NJ</td>\n",
       "      <td>1930-06-30 20:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Holyoke</td>\n",
       "      <td>NaN</td>\n",
       "      <td>OVAL</td>\n",
       "      <td>CO</td>\n",
       "      <td>1931-02-15 14:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Abilene</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DISK</td>\n",
       "      <td>KS</td>\n",
       "      <td>1931-06-01 13:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>New York Worlds Fair</td>\n",
       "      <td>NaN</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>NY</td>\n",
       "      <td>1933-04-18 19:00:00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   City Colors Reported Shape Reported State  \\\n",
       "0                Ithaca             NaN       TRIANGLE    NY   \n",
       "1           Willingboro             NaN          OTHER    NJ   \n",
       "2               Holyoke             NaN           OVAL    CO   \n",
       "3               Abilene             NaN           DISK    KS   \n",
       "4  New York Worlds Fair             NaN          LIGHT    NY   \n",
       "\n",
       "                 Time  \n",
       "0 1930-06-01 22:00:00  \n",
       "1 1930-06-30 20:00:00  \n",
       "2 1931-02-15 14:00:00  \n",
       "3 1931-06-01 13:00:00  \n",
       "4 1933-04-18 19:00:00  "
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ufo.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "City                  25\n",
       "Colors Reported    15359\n",
       "Shape Reported      2644\n",
       "State                  0\n",
       "Time                   0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ufo.isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "City               0.001371\n",
       "Colors Reported    0.842004\n",
       "Shape Reported     0.144948\n",
       "State              0.000000\n",
       "Time               0.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ufo.isna().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>State</th>\n",
       "      <th>Time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NY</td>\n",
       "      <td>1930-06-01 22:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NJ</td>\n",
       "      <td>1930-06-30 20:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>CO</td>\n",
       "      <td>1931-02-15 14:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>KS</td>\n",
       "      <td>1931-06-01 13:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NY</td>\n",
       "      <td>1933-04-18 19:00:00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  State                Time\n",
       "0    NY 1930-06-01 22:00:00\n",
       "1    NJ 1930-06-30 20:00:00\n",
       "2    CO 1931-02-15 14:00:00\n",
       "3    KS 1931-06-01 13:00:00\n",
       "4    NY 1933-04-18 19:00:00"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ufo.dropna(axis='columns').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>Time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ithaca</td>\n",
       "      <td>NY</td>\n",
       "      <td>1930-06-01 22:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Willingboro</td>\n",
       "      <td>NJ</td>\n",
       "      <td>1930-06-30 20:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Holyoke</td>\n",
       "      <td>CO</td>\n",
       "      <td>1931-02-15 14:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Abilene</td>\n",
       "      <td>KS</td>\n",
       "      <td>1931-06-01 13:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>New York Worlds Fair</td>\n",
       "      <td>NY</td>\n",
       "      <td>1933-04-18 19:00:00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   City State                Time\n",
       "0                Ithaca    NY 1930-06-01 22:00:00\n",
       "1           Willingboro    NJ 1930-06-30 20:00:00\n",
       "2               Holyoke    CO 1931-02-15 14:00:00\n",
       "3               Abilene    KS 1931-06-01 13:00:00\n",
       "4  New York Worlds Fair    NY 1933-04-18 19:00:00"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ufo.dropna(thresh=len(ufo)*0.9, axis='columns').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>location</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>John Arthur Doe</td>\n",
       "      <td>Los Angeles, CA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Jane Ann Smith</td>\n",
       "      <td>Washington, DC</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              name         location\n",
       "0  John Arthur Doe  Los Angeles, CA\n",
       "1   Jane Ann Smith   Washington, DC"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({'name':['John Arthur Doe', 'Jane Ann Smith'],\n",
    "                   'location':['Los Angeles, CA', 'Washington, DC']})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>John</td>\n",
       "      <td>Arthur</td>\n",
       "      <td>Doe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Jane</td>\n",
       "      <td>Ann</td>\n",
       "      <td>Smith</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      0       1      2\n",
       "0  John  Arthur    Doe\n",
       "1  Jane     Ann  Smith"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.name.str.split(' ', expand=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>location</th>\n",
       "      <th>first</th>\n",
       "      <th>middle</th>\n",
       "      <th>last</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>John Arthur Doe</td>\n",
       "      <td>Los Angeles, CA</td>\n",
       "      <td>John</td>\n",
       "      <td>Arthur</td>\n",
       "      <td>Doe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Jane Ann Smith</td>\n",
       "      <td>Washington, DC</td>\n",
       "      <td>Jane</td>\n",
       "      <td>Ann</td>\n",
       "      <td>Smith</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              name         location first  middle   last\n",
       "0  John Arthur Doe  Los Angeles, CA  John  Arthur    Doe\n",
       "1   Jane Ann Smith   Washington, DC  Jane     Ann  Smith"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[['first', 'middle', 'last']] = df.name.str.split(' ', expand=True)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>CA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Washington</td>\n",
       "      <td>DC</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             0   1\n",
       "0  Los Angeles  CA\n",
       "1   Washington  DC"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.location.str.split(', ', expand=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>location</th>\n",
       "      <th>first</th>\n",
       "      <th>middle</th>\n",
       "      <th>last</th>\n",
       "      <th>city</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>John Arthur Doe</td>\n",
       "      <td>Los Angeles, CA</td>\n",
       "      <td>John</td>\n",
       "      <td>Arthur</td>\n",
       "      <td>Doe</td>\n",
       "      <td>Los Angeles</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Jane Ann Smith</td>\n",
       "      <td>Washington, DC</td>\n",
       "      <td>Jane</td>\n",
       "      <td>Ann</td>\n",
       "      <td>Smith</td>\n",
       "      <td>Washington</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              name         location first  middle   last         city\n",
       "0  John Arthur Doe  Los Angeles, CA  John  Arthur    Doe  Los Angeles\n",
       "1   Jane Ann Smith   Washington, DC  Jane     Ann  Smith   Washington"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['city'] = df.location.str.split(', ', expand=True)[0]\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col_one</th>\n",
       "      <th>col_two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>[10, 40]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>[20, 50]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c</td>\n",
       "      <td>[30, 60]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  col_one   col_two\n",
       "0       a  [10, 40]\n",
       "1       b  [20, 50]\n",
       "2       c  [30, 60]"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({'col_one':['a', 'b', 'c'], 'col_two':[[10, 40], [20, 50], [30, 60]]})\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>10</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>30</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1\n",
       "0  10  40\n",
       "1  20  50\n",
       "2  30  60"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_new = df.col_two.apply(pd.Series)\n",
    "df_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>col_one</th>\n",
       "      <th>col_two</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>a</td>\n",
       "      <td>[10, 40]</td>\n",
       "      <td>10</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b</td>\n",
       "      <td>[20, 50]</td>\n",
       "      <td>20</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>c</td>\n",
       "      <td>[30, 60]</td>\n",
       "      <td>30</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  col_one   col_two   0   1\n",
       "0       a  [10, 40]  10  40\n",
       "1       b  [20, 50]  20  50\n",
       "2       c  [30, 60]  30  60"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df, df_new], axis='columns')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Fresh Tomato Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Izze</td>\n",
       "      <td>[Clementine]</td>\n",
       "      <td>3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Nantucket Nectar</td>\n",
       "      <td>[Apple]</td>\n",
       "      <td>3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Tomatillo-Green Chili Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Tomatillo-Red Chili Salsa (Hot), [Black Beans...</td>\n",
       "      <td>16.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...</td>\n",
       "      <td>10.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Side of Chips</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Soft Tacos</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...</td>\n",
       "      <td>9.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...</td>\n",
       "      <td>9.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  quantity                              item_name  \\\n",
       "0         1         1           Chips and Fresh Tomato Salsa   \n",
       "1         1         1                                   Izze   \n",
       "2         1         1                       Nantucket Nectar   \n",
       "3         1         1  Chips and Tomatillo-Green Chili Salsa   \n",
       "4         2         2                           Chicken Bowl   \n",
       "5         3         1                           Chicken Bowl   \n",
       "6         3         1                          Side of Chips   \n",
       "7         4         1                          Steak Burrito   \n",
       "8         4         1                       Steak Soft Tacos   \n",
       "9         5         1                          Steak Burrito   \n",
       "\n",
       "                                  choice_description  item_price  \n",
       "0                                                NaN        2.39  \n",
       "1                                       [Clementine]        3.39  \n",
       "2                                            [Apple]        3.39  \n",
       "3                                                NaN        2.39  \n",
       "4  [Tomatillo-Red Chili Salsa (Hot), [Black Beans...       16.98  \n",
       "5  [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...       10.98  \n",
       "6                                                NaN        1.69  \n",
       "7  [Tomatillo Red Chili Salsa, [Fajita Vegetables...       11.75  \n",
       "8  [Tomatillo Green Chili Salsa, [Pinto Beans, Ch...        9.25  \n",
       "9  [Fresh Tomato Salsa, [Rice, Black Beans, Pinto...        9.25  "
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11.56"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders[orders.order_id == 1].item_price.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "order_id\n",
       "1    11.56\n",
       "2    16.98\n",
       "3    12.67\n",
       "4    21.00\n",
       "5    13.70\n",
       "Name: item_price, dtype: float64"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders.groupby(\"order_id\").item_price.sum().head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>order_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11.56</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16.98</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12.67</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>21.00</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>13.70</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            sum  count\n",
       "order_id              \n",
       "1         11.56      4\n",
       "2         16.98      1\n",
       "3         12.67      2\n",
       "4         21.00      2\n",
       "5         13.70      2"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders.groupby(\"order_id\").item_price.agg(['sum', 'count']).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Fresh Tomato Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Izze</td>\n",
       "      <td>[Clementine]</td>\n",
       "      <td>3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Nantucket Nectar</td>\n",
       "      <td>[Apple]</td>\n",
       "      <td>3.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Tomatillo-Green Chili Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Tomatillo-Red Chili Salsa (Hot), [Black Beans...</td>\n",
       "      <td>16.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...</td>\n",
       "      <td>10.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Side of Chips</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Soft Tacos</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...</td>\n",
       "      <td>9.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...</td>\n",
       "      <td>9.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  quantity                              item_name  \\\n",
       "0         1         1           Chips and Fresh Tomato Salsa   \n",
       "1         1         1                                   Izze   \n",
       "2         1         1                       Nantucket Nectar   \n",
       "3         1         1  Chips and Tomatillo-Green Chili Salsa   \n",
       "4         2         2                           Chicken Bowl   \n",
       "5         3         1                           Chicken Bowl   \n",
       "6         3         1                          Side of Chips   \n",
       "7         4         1                          Steak Burrito   \n",
       "8         4         1                       Steak Soft Tacos   \n",
       "9         5         1                          Steak Burrito   \n",
       "\n",
       "                                  choice_description  item_price  \n",
       "0                                                NaN        2.39  \n",
       "1                                       [Clementine]        3.39  \n",
       "2                                            [Apple]        3.39  \n",
       "3                                                NaN        2.39  \n",
       "4  [Tomatillo-Red Chili Salsa (Hot), [Black Beans...       16.98  \n",
       "5  [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...       10.98  \n",
       "6                                                NaN        1.69  \n",
       "7  [Tomatillo Red Chili Salsa, [Fajita Vegetables...       11.75  \n",
       "8  [Tomatillo Green Chili Salsa, [Pinto Beans, Ch...        9.25  \n",
       "9  [Fresh Tomato Salsa, [Rice, Black Beans, Pinto...        9.25  "
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "order_id\n",
       "1    11.56\n",
       "2    16.98\n",
       "3    12.67\n",
       "4    21.00\n",
       "5    13.70\n",
       "Name: item_price, dtype: float64"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders.groupby('order_id').item_price.sum().head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1834"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(orders.groupby('order_id').item_price.sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4622"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(orders.item_price)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4622"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_price = orders.groupby('order_id').item_price.transform('sum')\n",
    "len(total_price)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "      <th>total_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Fresh Tomato Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "      <td>11.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Izze</td>\n",
       "      <td>[Clementine]</td>\n",
       "      <td>3.39</td>\n",
       "      <td>11.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Nantucket Nectar</td>\n",
       "      <td>[Apple]</td>\n",
       "      <td>3.39</td>\n",
       "      <td>11.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Tomatillo-Green Chili Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "      <td>11.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Tomatillo-Red Chili Salsa (Hot), [Black Beans...</td>\n",
       "      <td>16.98</td>\n",
       "      <td>16.98</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...</td>\n",
       "      <td>10.98</td>\n",
       "      <td>12.67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Side of Chips</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.69</td>\n",
       "      <td>12.67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.75</td>\n",
       "      <td>21.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Soft Tacos</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...</td>\n",
       "      <td>9.25</td>\n",
       "      <td>21.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...</td>\n",
       "      <td>9.25</td>\n",
       "      <td>13.70</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  quantity                              item_name  \\\n",
       "0         1         1           Chips and Fresh Tomato Salsa   \n",
       "1         1         1                                   Izze   \n",
       "2         1         1                       Nantucket Nectar   \n",
       "3         1         1  Chips and Tomatillo-Green Chili Salsa   \n",
       "4         2         2                           Chicken Bowl   \n",
       "5         3         1                           Chicken Bowl   \n",
       "6         3         1                          Side of Chips   \n",
       "7         4         1                          Steak Burrito   \n",
       "8         4         1                       Steak Soft Tacos   \n",
       "9         5         1                          Steak Burrito   \n",
       "\n",
       "                                  choice_description  item_price  total_price  \n",
       "0                                                NaN        2.39        11.56  \n",
       "1                                       [Clementine]        3.39        11.56  \n",
       "2                                            [Apple]        3.39        11.56  \n",
       "3                                                NaN        2.39        11.56  \n",
       "4  [Tomatillo-Red Chili Salsa (Hot), [Black Beans...       16.98        16.98  \n",
       "5  [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...       10.98        12.67  \n",
       "6                                                NaN        1.69        12.67  \n",
       "7  [Tomatillo Red Chili Salsa, [Fajita Vegetables...       11.75        21.00  \n",
       "8  [Tomatillo Green Chili Salsa, [Pinto Beans, Ch...        9.25        21.00  \n",
       "9  [Fresh Tomato Salsa, [Rice, Black Beans, Pinto...        9.25        13.70  "
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders['total_price'] = total_price\n",
    "orders.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>order_id</th>\n",
       "      <th>quantity</th>\n",
       "      <th>item_name</th>\n",
       "      <th>choice_description</th>\n",
       "      <th>item_price</th>\n",
       "      <th>total_price</th>\n",
       "      <th>percent_of_total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Fresh Tomato Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "      <td>11.56</td>\n",
       "      <td>0.206747</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Izze</td>\n",
       "      <td>[Clementine]</td>\n",
       "      <td>3.39</td>\n",
       "      <td>11.56</td>\n",
       "      <td>0.293253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Nantucket Nectar</td>\n",
       "      <td>[Apple]</td>\n",
       "      <td>3.39</td>\n",
       "      <td>11.56</td>\n",
       "      <td>0.293253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Chips and Tomatillo-Green Chili Salsa</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.39</td>\n",
       "      <td>11.56</td>\n",
       "      <td>0.206747</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Tomatillo-Red Chili Salsa (Hot), [Black Beans...</td>\n",
       "      <td>16.98</td>\n",
       "      <td>16.98</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Chicken Bowl</td>\n",
       "      <td>[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...</td>\n",
       "      <td>10.98</td>\n",
       "      <td>12.67</td>\n",
       "      <td>0.866614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Side of Chips</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.69</td>\n",
       "      <td>12.67</td>\n",
       "      <td>0.133386</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Tomatillo Red Chili Salsa, [Fajita Vegetables...</td>\n",
       "      <td>11.75</td>\n",
       "      <td>21.00</td>\n",
       "      <td>0.559524</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Soft Tacos</td>\n",
       "      <td>[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...</td>\n",
       "      <td>9.25</td>\n",
       "      <td>21.00</td>\n",
       "      <td>0.440476</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>Steak Burrito</td>\n",
       "      <td>[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...</td>\n",
       "      <td>9.25</td>\n",
       "      <td>13.70</td>\n",
       "      <td>0.675182</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   order_id  quantity                              item_name  \\\n",
       "0         1         1           Chips and Fresh Tomato Salsa   \n",
       "1         1         1                                   Izze   \n",
       "2         1         1                       Nantucket Nectar   \n",
       "3         1         1  Chips and Tomatillo-Green Chili Salsa   \n",
       "4         2         2                           Chicken Bowl   \n",
       "5         3         1                           Chicken Bowl   \n",
       "6         3         1                          Side of Chips   \n",
       "7         4         1                          Steak Burrito   \n",
       "8         4         1                       Steak Soft Tacos   \n",
       "9         5         1                          Steak Burrito   \n",
       "\n",
       "                                  choice_description  item_price  total_price  \\\n",
       "0                                                NaN        2.39        11.56   \n",
       "1                                       [Clementine]        3.39        11.56   \n",
       "2                                            [Apple]        3.39        11.56   \n",
       "3                                                NaN        2.39        11.56   \n",
       "4  [Tomatillo-Red Chili Salsa (Hot), [Black Beans...       16.98        16.98   \n",
       "5  [Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...       10.98        12.67   \n",
       "6                                                NaN        1.69        12.67   \n",
       "7  [Tomatillo Red Chili Salsa, [Fajita Vegetables...       11.75        21.00   \n",
       "8  [Tomatillo Green Chili Salsa, [Pinto Beans, Ch...        9.25        21.00   \n",
       "9  [Fresh Tomato Salsa, [Rice, Black Beans, Pinto...        9.25        13.70   \n",
       "\n",
       "   percent_of_total  \n",
       "0          0.206747  \n",
       "1          0.293253  \n",
       "2          0.293253  \n",
       "3          0.206747  \n",
       "4          1.000000  \n",
       "5          0.866614  \n",
       "6          0.133386  \n",
       "7          0.559524  \n",
       "8          0.440476  \n",
       "9          0.675182  "
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "orders['percent_of_total'] = orders.item_price / orders.total_price\n",
    "orders.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1         0       3   \n",
       "1            2         1       1   \n",
       "2            3         1       3   \n",
       "3            4         1       1   \n",
       "4            5         0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500   NaN        S  \n",
       "1      0          PC 17599  71.2833   C85        C  \n",
       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "3      0            113803  53.1000  C123        S  \n",
       "4      0            373450   8.0500   NaN        S  "
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Fare</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>891.000000</td>\n",
       "      <td>891.000000</td>\n",
       "      <td>891.000000</td>\n",
       "      <td>714.000000</td>\n",
       "      <td>891.000000</td>\n",
       "      <td>891.000000</td>\n",
       "      <td>891.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>446.000000</td>\n",
       "      <td>0.383838</td>\n",
       "      <td>2.308642</td>\n",
       "      <td>29.699118</td>\n",
       "      <td>0.523008</td>\n",
       "      <td>0.381594</td>\n",
       "      <td>32.204208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>257.353842</td>\n",
       "      <td>0.486592</td>\n",
       "      <td>0.836071</td>\n",
       "      <td>14.526497</td>\n",
       "      <td>1.102743</td>\n",
       "      <td>0.806057</td>\n",
       "      <td>49.693429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.420000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>223.500000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>20.125000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>7.910400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>446.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>28.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>14.454200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>668.500000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>38.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>31.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>891.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>512.329200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       PassengerId    Survived      Pclass         Age       SibSp  \\\n",
       "count   891.000000  891.000000  891.000000  714.000000  891.000000   \n",
       "mean    446.000000    0.383838    2.308642   29.699118    0.523008   \n",
       "std     257.353842    0.486592    0.836071   14.526497    1.102743   \n",
       "min       1.000000    0.000000    1.000000    0.420000    0.000000   \n",
       "25%     223.500000    0.000000    2.000000   20.125000    0.000000   \n",
       "50%     446.000000    0.000000    3.000000   28.000000    0.000000   \n",
       "75%     668.500000    1.000000    3.000000   38.000000    1.000000   \n",
       "max     891.000000    1.000000    3.000000   80.000000    8.000000   \n",
       "\n",
       "            Parch        Fare  \n",
       "count  891.000000  891.000000  \n",
       "mean     0.381594   32.204208  \n",
       "std      0.806057   49.693429  \n",
       "min      0.000000    0.000000  \n",
       "25%      0.000000    7.910400  \n",
       "50%      0.000000   14.454200  \n",
       "75%      0.000000   31.000000  \n",
       "max      6.000000  512.329200  "
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Fare</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.420</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>223.5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>20.125</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.9104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>446.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>28.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.4542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>668.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>38.000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>31.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>891.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>80.000</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>512.3292</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     PassengerId  Survived  Pclass     Age  SibSp  Parch      Fare\n",
       "min          1.0       0.0     1.0   0.420    0.0    0.0    0.0000\n",
       "25%        223.5       0.0     2.0  20.125    0.0    0.0    7.9104\n",
       "50%        446.0       0.0     3.0  28.000    0.0    0.0   14.4542\n",
       "75%        668.5       1.0     3.0  38.000    1.0    0.0   31.0000\n",
       "max        891.0       1.0     3.0  80.000    8.0    6.0  512.3292"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.describe().loc['min':'max']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.420</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>2.0</td>\n",
       "      <td>20.125</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>3.0</td>\n",
       "      <td>28.000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>3.0</td>\n",
       "      <td>38.000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>3.0</td>\n",
       "      <td>80.000</td>\n",
       "      <td>8.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Pclass     Age  SibSp  Parch\n",
       "min     1.0   0.420    0.0    0.0\n",
       "25%     2.0  20.125    0.0    0.0\n",
       "50%     3.0  28.000    0.0    0.0\n",
       "75%     3.0  38.000    1.0    0.0\n",
       "max     3.0  80.000    8.0    6.0"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.describe().loc['min':'max', 'Pclass':'Parch']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3838383838383838"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.Survived.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Sex\n",
       "female    0.742038\n",
       "male      0.188908\n",
       "Name: Survived, dtype: float64"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.groupby('Sex').Survived.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Sex     Pclass\n",
       "female  1         0.968085\n",
       "        2         0.921053\n",
       "        3         0.500000\n",
       "male    1         0.368852\n",
       "        2         0.157407\n",
       "        3         0.135447\n",
       "Name: Survived, dtype: float64"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.groupby(['Sex', 'Pclass']).Survived.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Pclass</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>female</th>\n",
       "      <td>0.968085</td>\n",
       "      <td>0.921053</td>\n",
       "      <td>0.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>male</th>\n",
       "      <td>0.368852</td>\n",
       "      <td>0.157407</td>\n",
       "      <td>0.135447</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Pclass         1         2         3\n",
       "Sex                                 \n",
       "female  0.968085  0.921053  0.500000\n",
       "male    0.368852  0.157407  0.135447"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.groupby(['Sex', 'Pclass']).Survived.mean().unstack()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Pclass</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>female</th>\n",
       "      <td>0.968085</td>\n",
       "      <td>0.921053</td>\n",
       "      <td>0.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>male</th>\n",
       "      <td>0.368852</td>\n",
       "      <td>0.157407</td>\n",
       "      <td>0.135447</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Pclass         1         2         3\n",
       "Sex                                 \n",
       "female  0.968085  0.921053  0.500000\n",
       "male    0.368852  0.157407  0.135447"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.pivot_table(index='Sex', columns='Pclass', values='Survived', aggfunc='mean')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Pclass</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>female</th>\n",
       "      <td>0.968085</td>\n",
       "      <td>0.921053</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.742038</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>male</th>\n",
       "      <td>0.368852</td>\n",
       "      <td>0.157407</td>\n",
       "      <td>0.135447</td>\n",
       "      <td>0.188908</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>0.629630</td>\n",
       "      <td>0.472826</td>\n",
       "      <td>0.242363</td>\n",
       "      <td>0.383838</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Pclass         1         2         3       All\n",
       "Sex                                           \n",
       "female  0.968085  0.921053  0.500000  0.742038\n",
       "male    0.368852  0.157407  0.135447  0.188908\n",
       "All     0.629630  0.472826  0.242363  0.383838"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.pivot_table(index='Sex', columns='Pclass', values='Survived', aggfunc='mean',\n",
    "                    margins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Pclass</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>female</th>\n",
       "      <td>94</td>\n",
       "      <td>76</td>\n",
       "      <td>144</td>\n",
       "      <td>314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>male</th>\n",
       "      <td>122</td>\n",
       "      <td>108</td>\n",
       "      <td>347</td>\n",
       "      <td>577</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>216</td>\n",
       "      <td>184</td>\n",
       "      <td>491</td>\n",
       "      <td>891</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Pclass    1    2    3  All\n",
       "Sex                       \n",
       "female   94   76  144  314\n",
       "male    122  108  347  577\n",
       "All     216  184  491  891"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.pivot_table(index='Sex', columns='Pclass', values='Survived', aggfunc='count',\n",
    "                    margins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    22.0\n",
       "1    38.0\n",
       "2    26.0\n",
       "3    35.0\n",
       "4    35.0\n",
       "5     NaN\n",
       "6    54.0\n",
       "7     2.0\n",
       "8    27.0\n",
       "9    14.0\n",
       "Name: Age, dtype: float64"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.Age.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    young adult\n",
       "1          adult\n",
       "2          adult\n",
       "3          adult\n",
       "4          adult\n",
       "5            NaN\n",
       "6          adult\n",
       "7          child\n",
       "8          adult\n",
       "9          child\n",
       "Name: Age, dtype: category\n",
       "Categories (3, object): [child < young adult < adult]"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.cut(titanic.Age, bins=[0, 18, 25, 99], labels=['child', 'young adult', 'adult']).head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1         0       3   \n",
       "1            2         1       1   \n",
       "2            3         1       3   \n",
       "3            4         1       1   \n",
       "4            5         0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500   NaN        S  \n",
       "1      0          PC 17599  71.2833   C85        C  \n",
       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "3      0            113803  53.1000  C123        S  \n",
       "4      0            373450   8.0500   NaN        S  "
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.float_format', '{:.2f}'.format)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.00</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.00</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.28</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.92</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.00</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.10</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1         0       3   \n",
       "1            2         1       1   \n",
       "2            3         1       3   \n",
       "3            4         1       1   \n",
       "4            5         0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male 22.00      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female 38.00      1   \n",
       "2                             Heikkinen, Miss. Laina  female 26.00      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female 35.00      1   \n",
       "4                           Allen, Mr. William Henry    male 35.00      0   \n",
       "\n",
       "   Parch            Ticket  Fare Cabin Embarked  \n",
       "0      0         A/5 21171  7.25   NaN        S  \n",
       "1      0          PC 17599 71.28   C85        C  \n",
       "2      0  STON/O2. 3101282  7.92   NaN        S  \n",
       "3      0            113803 53.10  C123        S  \n",
       "4      0            373450  8.05   NaN        S  "
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.reset_option('display.float_format')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PassengerId</th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1         0       3   \n",
       "1            2         1       1   \n",
       "2            3         1       3   \n",
       "3            4         1       1   \n",
       "4            5         0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500   NaN        S  \n",
       "1      0          PC 17599  71.2833   C85        C  \n",
       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "3      0            113803  53.1000  C123        S  \n",
       "4      0            373450   8.0500   NaN        S  "
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "titanic.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Date</th>\n",
       "      <th>Close</th>\n",
       "      <th>Volume</th>\n",
       "      <th>Symbol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>31.50</td>\n",
       "      <td>14070500</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>112.52</td>\n",
       "      <td>21701800</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2016-10-03</td>\n",
       "      <td>57.42</td>\n",
       "      <td>19189500</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>113.00</td>\n",
       "      <td>29736800</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>57.24</td>\n",
       "      <td>20085900</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2016-10-04</td>\n",
       "      <td>31.35</td>\n",
       "      <td>18460400</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>57.64</td>\n",
       "      <td>16726400</td>\n",
       "      <td>MSFT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>31.59</td>\n",
       "      <td>11808600</td>\n",
       "      <td>CSCO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2016-10-05</td>\n",
       "      <td>113.05</td>\n",
       "      <td>21453100</td>\n",
       "      <td>AAPL</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Date   Close    Volume Symbol\n",
       "0 2016-10-03   31.50  14070500   CSCO\n",
       "1 2016-10-03  112.52  21701800   AAPL\n",
       "2 2016-10-03   57.42  19189500   MSFT\n",
       "3 2016-10-04  113.00  29736800   AAPL\n",
       "4 2016-10-04   57.24  20085900   MSFT\n",
       "5 2016-10-04   31.35  18460400   CSCO\n",
       "6 2016-10-05   57.64  16726400   MSFT\n",
       "7 2016-10-05   31.59  11808600   CSCO\n",
       "8 2016-10-05  113.05  21453100   AAPL"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style  type=\"text/css\" >\n",
       "</style>  \n",
       "<table id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152e\" > \n",
       "<thead>    <tr> \n",
       "        <th class=\"blank level0\" ></th> \n",
       "        <th class=\"col_heading level0 col0\" >Date</th> \n",
       "        <th class=\"col_heading level0 col1\" >Close</th> \n",
       "        <th class=\"col_heading level0 col2\" >Volume</th> \n",
       "        <th class=\"col_heading level0 col3\" >Symbol</th> \n",
       "    </tr></thead> \n",
       "<tbody>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row0\" class=\"row_heading level0 row0\" >0</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow0_col0\" class=\"data row0 col0\" >10/03/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow0_col1\" class=\"data row0 col1\" >$31.50</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow0_col2\" class=\"data row0 col2\" >14,070,500</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow0_col3\" class=\"data row0 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row1\" class=\"row_heading level0 row1\" >1</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow1_col0\" class=\"data row1 col0\" >10/03/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow1_col1\" class=\"data row1 col1\" >$112.52</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow1_col2\" class=\"data row1 col2\" >21,701,800</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow1_col3\" class=\"data row1 col3\" >AAPL</td> \n",
       "    </tr>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row2\" class=\"row_heading level0 row2\" >2</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow2_col0\" class=\"data row2 col0\" >10/03/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow2_col1\" class=\"data row2 col1\" >$57.42</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow2_col2\" class=\"data row2 col2\" >19,189,500</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow2_col3\" class=\"data row2 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row3\" class=\"row_heading level0 row3\" >3</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow3_col0\" class=\"data row3 col0\" >10/04/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow3_col1\" class=\"data row3 col1\" >$113.00</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow3_col2\" class=\"data row3 col2\" >29,736,800</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow3_col3\" class=\"data row3 col3\" >AAPL</td> \n",
       "    </tr>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row4\" class=\"row_heading level0 row4\" >4</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow4_col0\" class=\"data row4 col0\" >10/04/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow4_col1\" class=\"data row4 col1\" >$57.24</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow4_col2\" class=\"data row4 col2\" >20,085,900</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow4_col3\" class=\"data row4 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row5\" class=\"row_heading level0 row5\" >5</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow5_col0\" class=\"data row5 col0\" >10/04/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow5_col1\" class=\"data row5 col1\" >$31.35</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow5_col2\" class=\"data row5 col2\" >18,460,400</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow5_col3\" class=\"data row5 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row6\" class=\"row_heading level0 row6\" >6</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow6_col0\" class=\"data row6 col0\" >10/05/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow6_col1\" class=\"data row6 col1\" >$57.64</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow6_col2\" class=\"data row6 col2\" >16,726,400</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow6_col3\" class=\"data row6 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row7\" class=\"row_heading level0 row7\" >7</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow7_col0\" class=\"data row7 col0\" >10/05/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow7_col1\" class=\"data row7 col1\" >$31.59</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow7_col2\" class=\"data row7 col2\" >11,808,600</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow7_col3\" class=\"data row7 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <th id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152elevel0_row8\" class=\"row_heading level0 row8\" >8</th> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow8_col0\" class=\"data row8 col0\" >10/05/16</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow8_col1\" class=\"data row8 col1\" >$113.05</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow8_col2\" class=\"data row8 col2\" >21,453,100</td> \n",
       "        <td id=\"T_2224f1b0_4e5f_11ea_9a7d_f2189878152erow8_col3\" class=\"data row8 col3\" >AAPL</td> \n",
       "    </tr></tbody> \n",
       "</table> "
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x121050b00>"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "format_dict = {'Date':'{:%m/%d/%y}', 'Close':'${:.2f}', 'Volume':'{:,}'}\n",
    "stocks.style.format(format_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style  type=\"text/css\" >\n",
       "    #T_978d77cc_4e5f_11ea_bc64_f2189878152erow5_col1 {\n",
       "            background-color:  red;\n",
       "            : ;\n",
       "        }    #T_978d77cc_4e5f_11ea_bc64_f2189878152erow8_col1 {\n",
       "            : ;\n",
       "            background-color:  lightgreen;\n",
       "        }</style>  \n",
       "<table id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152e\" > \n",
       "<thead>    <tr> \n",
       "        <th class=\"col_heading level0 col0\" >Date</th> \n",
       "        <th class=\"col_heading level0 col1\" >Close</th> \n",
       "        <th class=\"col_heading level0 col2\" >Volume</th> \n",
       "        <th class=\"col_heading level0 col3\" >Symbol</th> \n",
       "    </tr></thead> \n",
       "<tbody>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow0_col0\" class=\"data row0 col0\" >10/03/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow0_col1\" class=\"data row0 col1\" >$31.50</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow0_col2\" class=\"data row0 col2\" >14,070,500</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow0_col3\" class=\"data row0 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow1_col0\" class=\"data row1 col0\" >10/03/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow1_col1\" class=\"data row1 col1\" >$112.52</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow1_col2\" class=\"data row1 col2\" >21,701,800</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow1_col3\" class=\"data row1 col3\" >AAPL</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow2_col0\" class=\"data row2 col0\" >10/03/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow2_col1\" class=\"data row2 col1\" >$57.42</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow2_col2\" class=\"data row2 col2\" >19,189,500</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow2_col3\" class=\"data row2 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow3_col0\" class=\"data row3 col0\" >10/04/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow3_col1\" class=\"data row3 col1\" >$113.00</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow3_col2\" class=\"data row3 col2\" >29,736,800</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow3_col3\" class=\"data row3 col3\" >AAPL</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow4_col0\" class=\"data row4 col0\" >10/04/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow4_col1\" class=\"data row4 col1\" >$57.24</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow4_col2\" class=\"data row4 col2\" >20,085,900</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow4_col3\" class=\"data row4 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow5_col0\" class=\"data row5 col0\" >10/04/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow5_col1\" class=\"data row5 col1\" >$31.35</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow5_col2\" class=\"data row5 col2\" >18,460,400</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow5_col3\" class=\"data row5 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow6_col0\" class=\"data row6 col0\" >10/05/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow6_col1\" class=\"data row6 col1\" >$57.64</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow6_col2\" class=\"data row6 col2\" >16,726,400</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow6_col3\" class=\"data row6 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow7_col0\" class=\"data row7 col0\" >10/05/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow7_col1\" class=\"data row7 col1\" >$31.59</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow7_col2\" class=\"data row7 col2\" >11,808,600</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow7_col3\" class=\"data row7 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow8_col0\" class=\"data row8 col0\" >10/05/16</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow8_col1\" class=\"data row8 col1\" >$113.05</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow8_col2\" class=\"data row8 col2\" >21,453,100</td> \n",
       "        <td id=\"T_978d77cc_4e5f_11ea_bc64_f2189878152erow8_col3\" class=\"data row8 col3\" >AAPL</td> \n",
       "    </tr></tbody> \n",
       "</table> "
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x121c7bfd0>"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(stocks.style.format(format_dict)\n",
    " .hide_index()\n",
    " .highlight_min('Close', color='red')\n",
    " .highlight_max('Close', color='lightgreen')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style  type=\"text/css\" >\n",
       "    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow0_col2 {\n",
       "            background-color:  #deebf7;\n",
       "        }    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow1_col2 {\n",
       "            background-color:  #5aa2cf;\n",
       "        }    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow2_col2 {\n",
       "            background-color:  #8fc2de;\n",
       "        }    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow3_col2 {\n",
       "            background-color:  #08306b;\n",
       "        }    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow4_col2 {\n",
       "            background-color:  #7ab6d9;\n",
       "        }    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow5_col2 {\n",
       "            background-color:  #a0cbe2;\n",
       "        }    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow6_col2 {\n",
       "            background-color:  #bed8ec;\n",
       "        }    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow7_col2 {\n",
       "            background-color:  #f7fbff;\n",
       "        }    #T_fa88b0dc_4e5f_11ea_a010_f2189878152erow8_col2 {\n",
       "            background-color:  #5fa6d1;\n",
       "        }</style>  \n",
       "<table id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152e\" > \n",
       "<thead>    <tr> \n",
       "        <th class=\"col_heading level0 col0\" >Date</th> \n",
       "        <th class=\"col_heading level0 col1\" >Close</th> \n",
       "        <th class=\"col_heading level0 col2\" >Volume</th> \n",
       "        <th class=\"col_heading level0 col3\" >Symbol</th> \n",
       "    </tr></thead> \n",
       "<tbody>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow0_col0\" class=\"data row0 col0\" >10/03/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow0_col1\" class=\"data row0 col1\" >$31.50</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow0_col2\" class=\"data row0 col2\" >14,070,500</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow0_col3\" class=\"data row0 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow1_col0\" class=\"data row1 col0\" >10/03/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow1_col1\" class=\"data row1 col1\" >$112.52</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow1_col2\" class=\"data row1 col2\" >21,701,800</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow1_col3\" class=\"data row1 col3\" >AAPL</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow2_col0\" class=\"data row2 col0\" >10/03/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow2_col1\" class=\"data row2 col1\" >$57.42</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow2_col2\" class=\"data row2 col2\" >19,189,500</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow2_col3\" class=\"data row2 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow3_col0\" class=\"data row3 col0\" >10/04/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow3_col1\" class=\"data row3 col1\" >$113.00</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow3_col2\" class=\"data row3 col2\" >29,736,800</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow3_col3\" class=\"data row3 col3\" >AAPL</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow4_col0\" class=\"data row4 col0\" >10/04/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow4_col1\" class=\"data row4 col1\" >$57.24</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow4_col2\" class=\"data row4 col2\" >20,085,900</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow4_col3\" class=\"data row4 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow5_col0\" class=\"data row5 col0\" >10/04/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow5_col1\" class=\"data row5 col1\" >$31.35</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow5_col2\" class=\"data row5 col2\" >18,460,400</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow5_col3\" class=\"data row5 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow6_col0\" class=\"data row6 col0\" >10/05/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow6_col1\" class=\"data row6 col1\" >$57.64</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow6_col2\" class=\"data row6 col2\" >16,726,400</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow6_col3\" class=\"data row6 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow7_col0\" class=\"data row7 col0\" >10/05/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow7_col1\" class=\"data row7 col1\" >$31.59</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow7_col2\" class=\"data row7 col2\" >11,808,600</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow7_col3\" class=\"data row7 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow8_col0\" class=\"data row8 col0\" >10/05/16</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow8_col1\" class=\"data row8 col1\" >$113.05</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow8_col2\" class=\"data row8 col2\" >21,453,100</td> \n",
       "        <td id=\"T_fa88b0dc_4e5f_11ea_a010_f2189878152erow8_col3\" class=\"data row8 col3\" >AAPL</td> \n",
       "    </tr></tbody> \n",
       "</table> "
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x121c7ba90>"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(stocks.style.format(format_dict)\n",
    " .hide_index()\n",
    " .background_gradient(subset='Volume', cmap='Blues')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style  type=\"text/css\" >\n",
       "    #T_3e085934_4e60_11ea_84a0_f2189878152erow0_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 73.7%, transparent 73.7%);\n",
       "        }    #T_3e085934_4e60_11ea_84a0_f2189878152erow1_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 86.5%, transparent 86.5%);\n",
       "        }    #T_3e085934_4e60_11ea_84a0_f2189878152erow2_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 82.3%, transparent 82.3%);\n",
       "        }    #T_3e085934_4e60_11ea_84a0_f2189878152erow3_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 100.0%, transparent 100.0%);\n",
       "        }    #T_3e085934_4e60_11ea_84a0_f2189878152erow4_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 83.8%, transparent 83.8%);\n",
       "        }    #T_3e085934_4e60_11ea_84a0_f2189878152erow5_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 81.0%, transparent 81.0%);\n",
       "        }    #T_3e085934_4e60_11ea_84a0_f2189878152erow6_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 78.1%, transparent 78.1%);\n",
       "        }    #T_3e085934_4e60_11ea_84a0_f2189878152erow7_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 69.9%, transparent 69.9%);\n",
       "        }    #T_3e085934_4e60_11ea_84a0_f2189878152erow8_col2 {\n",
       "            width:  10em;\n",
       "             height:  80%;\n",
       "            background:  linear-gradient(90deg, transparent 0%, transparent 50%, lightblue 50%, lightblue 86.1%, transparent 86.1%);\n",
       "        }</style>  \n",
       "<table id=\"T_3e085934_4e60_11ea_84a0_f2189878152e\" ><caption>Stock Prices from October 2016</caption> \n",
       "<thead>    <tr> \n",
       "        <th class=\"col_heading level0 col0\" >Date</th> \n",
       "        <th class=\"col_heading level0 col1\" >Close</th> \n",
       "        <th class=\"col_heading level0 col2\" >Volume</th> \n",
       "        <th class=\"col_heading level0 col3\" >Symbol</th> \n",
       "    </tr></thead> \n",
       "<tbody>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow0_col0\" class=\"data row0 col0\" >10/03/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow0_col1\" class=\"data row0 col1\" >$31.50</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow0_col2\" class=\"data row0 col2\" >14,070,500</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow0_col3\" class=\"data row0 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow1_col0\" class=\"data row1 col0\" >10/03/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow1_col1\" class=\"data row1 col1\" >$112.52</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow1_col2\" class=\"data row1 col2\" >21,701,800</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow1_col3\" class=\"data row1 col3\" >AAPL</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow2_col0\" class=\"data row2 col0\" >10/03/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow2_col1\" class=\"data row2 col1\" >$57.42</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow2_col2\" class=\"data row2 col2\" >19,189,500</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow2_col3\" class=\"data row2 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow3_col0\" class=\"data row3 col0\" >10/04/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow3_col1\" class=\"data row3 col1\" >$113.00</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow3_col2\" class=\"data row3 col2\" >29,736,800</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow3_col3\" class=\"data row3 col3\" >AAPL</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow4_col0\" class=\"data row4 col0\" >10/04/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow4_col1\" class=\"data row4 col1\" >$57.24</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow4_col2\" class=\"data row4 col2\" >20,085,900</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow4_col3\" class=\"data row4 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow5_col0\" class=\"data row5 col0\" >10/04/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow5_col1\" class=\"data row5 col1\" >$31.35</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow5_col2\" class=\"data row5 col2\" >18,460,400</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow5_col3\" class=\"data row5 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow6_col0\" class=\"data row6 col0\" >10/05/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow6_col1\" class=\"data row6 col1\" >$57.64</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow6_col2\" class=\"data row6 col2\" >16,726,400</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow6_col3\" class=\"data row6 col3\" >MSFT</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow7_col0\" class=\"data row7 col0\" >10/05/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow7_col1\" class=\"data row7 col1\" >$31.59</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow7_col2\" class=\"data row7 col2\" >11,808,600</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow7_col3\" class=\"data row7 col3\" >CSCO</td> \n",
       "    </tr>    <tr> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow8_col0\" class=\"data row8 col0\" >10/05/16</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow8_col1\" class=\"data row8 col1\" >$113.05</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow8_col2\" class=\"data row8 col2\" >21,453,100</td> \n",
       "        <td id=\"T_3e085934_4e60_11ea_84a0_f2189878152erow8_col3\" class=\"data row8 col3\" >AAPL</td> \n",
       "    </tr></tbody> \n",
       "</table> "
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x121d962b0>"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(stocks.style.format(format_dict)\n",
    " .hide_index()\n",
    " .bar('Volume', color='lightblue', align='zero')\n",
    " .set_caption('Stock Prices from October 2016')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tab(children=(HTML(value='<div id=\"overview-content\" class=\"row variable spacing\">\\n    <div class=\"row\">\\n   …"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Report generated with <a href=\"https://github.com/pandas-profiling/pandas-profiling\">pandas-profiling</a>."
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas_profiling\n",
    "pandas_profiling.ProfileReport(titanic)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
